
import pandas as pd
count = 1
#首先先转换为.csv文件
dataset = pd.read_csv("token.txt",sep = ';',header=0)
dataset.to_csv("token.csv",index=False)

#转换为tsv文件
with open('token.csv',encoding='utf-8') as f:
    data = f.read().replace('"',"")
with open('token.tsv','w',encoding='utf-8') as f:
    f.write(data)
f.close()
f = open('token2.tsv',"r+")
f.truncate()
#统计string的行，并添加'"'使得输出一致
for count,line in enumerate(open("token.tsv",'r',encoding='utf-8').readlines()):
    count+=1
#print(count)
line_nums = 0
with open('token.tsv','r',encoding='utf-8') as f:
    for line in f:
        line = line.strip('\n').split('\t')
        #print(line[0])
        line_nums+=1
        if(line[0].find("STRING")!=-1):
            #print(line[0])
            nums = line[0].find("<STRING")
            line[0] = '"' + line[0][:nums-1] + '" '+line[0][nums:]
            #print(line)
        with open('token2.tsv','a',encoding='utf-8') as f:
            f.write(line[0])
            if line_nums == count:
                pass
            else:
                f.write('\n')